## [1] 3005 16450
Least sparse genes (most non-zeros)
Most sparse genes (most zeros)
Medium sparse genes
Singular values
Left eigenvectors (Cells)
Right eigenvectors (Genes)
Removing cells with almost no counts anywhere
## [1] 2326 700
## [,1] [,2]
## astrocytes 0.2321429 224
## endothelial 0.1574468 235
## interneurons 0.9931034 290
## microglia 0.1428571 98
## oligodendrocytes 0.7365854 820
## pyramidal CA1 0.9957401 939
## pyramidal SS 0.9924812 399
par(mfrow = c(1,2))
plot(table(dat3))
plot(table(dat3), ylim = c(0, 100))
Singular values
Left eigenvectors (Cells)
Right eigenvectors (Genes)
Modeling approximation
dat_approx <- res_svd$u[,1:2] %*% diag(res_svd$d[1:2]) %*% t(res_svd$v[,1:2])
residual <- dat3 - dat_approx
set.seed(10)
idx <- sample(prod(dim(dat3)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)
plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 50, by = 0.25)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)
Singular values
Left eigenvectors (Cells)
Right eigenvectors (Genes)
Modeling approximation
dat_approx <- res_svd$u[,1:2] %*% diag(res_svd$d[1:2]) %*% t(res_svd$v[,1:2])
residual <- dat4 - dat_approx
set.seed(10)
idx <- sample(prod(dim(dat4)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)
plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 50, by = 0.25)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)
Largest
Smallest
Medium
Singular values
Left eigenvectors (Cells)
Right eigenvectors (Genes)
## interneurons pyramidal SS pyramidal CA1 oligodendrocytes microglia
## 0% 0.00000000 0.00000000 0.00000000 0.000000000 0.00000000
## 25% 0.01034483 0.01253133 0.01064963 0.002439024 0.00000000
## 50% 0.03103448 0.02756892 0.03194888 0.008536585 0.00000000
## 75% 0.06551724 0.05263158 0.06922258 0.019512195 0.02040816
## 100% 0.91724138 0.69172932 0.83493078 0.841463415 0.97959184
## endothelial astrocytes
## 0% 0.000000000 0.000000000
## 25% 0.000000000 0.000000000
## 50% 0.004255319 0.008928571
## 75% 0.017021277 0.017857143
## 100% 0.672340426 0.785714286
Most distinguished for each cell type
Least distinguished
Medium distinguished
Singular values
Left eigenvectors (Cells)
Right eigenvectors (Genes)
Removing cells with almost no counts anywhere
## [1] 3005 700
par(mfrow = c(1,2))
plot(table(dat3))
plot(table(dat3), ylim = c(0, 100))
Singular values
Left eigenvectors (Cells)
Right eigenvectors (Genes)
Modeling approximation
dat_approx <- res_svd$u[,1:8] %*% diag(res_svd$d[1:8]) %*% t(res_svd$v[,1:8])
residual <- dat3 - dat_approx
set.seed(10)
idx <- sample(prod(dim(dat3)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)
plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 500, by = 1)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)
Singular values
Left eigenvectors (Cells)
Right eigenvectors (Genes)
Modeling approximation
dat_approx <- res_svd$u[,1:7] %*% diag(res_svd$d[1:7]) %*% t(res_svd$v[,1:7])
residual <- dat4 - dat_approx
set.seed(10)
idx <- sample(prod(dim(dat4)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)
plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 50, by = 0.25)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)